from StringIO import StringIO
import gzip, urllib2
from BeautifulSoup import BeautifulSoup as bs


def get(a):
    r = urllib2.Request(a)
    r.add_header('Accept-encoding', 'gzip')
    r = urllib2.urlopen(r)
    if r.info().get('Content-Encoding') == 'gzip':
        r = gzip.GzipFile(fileobj=StringIO(r.read()))
    return r.read()


a = 'http://www.runoob.com/java/java-tutorial.html'
d = 'http://www.runoob.com/java/'
c = []

strs=''
while True:
    b=get(a)
    f=open('e:/runoob/java/'+a[len(d):],'w')
    f.write(b)
    f.close()
    soup=bs(b)
    strs+=str(soup.find('div',attrs={'class':'article-body'}))
    b = soup.find(rel="next")
    if not b: break
    a = b['href']
    e = a[len(d):]
    print e
    if e in c: break
    c += [e]


print c
f=open('e:/runoob/java/all.html','w')
f.write(strs)
f.close()

